Tph2 Fiber Analysis

Introduction

This notebook takes in exported CellProfiler data along with a metadata table with information about treatment for each mouse and outputs graphs and statistics about the fibers.

Setup

library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.1.2
library(here)
here() starts at /Users/rsenft/Documents/GitHub/RebeccaSenft_Projects/002_Dymecki_neuro/Batch_3_fibers/Fibers_analysis
library(dplyr)
Warning: package 'dplyr' was built under R version 4.1.2

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(RColorBrewer)
Warning: package 'RColorBrewer' was built under R version 4.1.2
library(readr)
Warning: package 'readr' was built under R version 4.1.2
library(DT)
Warning: package 'DT' was built under R version 4.1.2

Get data

Note that the CSV had to be corrected because there is one filename with ROB instead of ROb and because not all the metadata is extracted correctly (inconsistent filenames), so I manually corrected this in Excel using Flash Fill and correcting the mispelling and saved this as Tph2_Per_Image_metadata_corrected.csv.

path_to_csv_folder = here("csvs")
save_path <- path_to_csv_folder
img_filename="Tph2_Per_Image_metadata_corrected.csv"
sc_filename="Tph2_Per_BorderedCells.csv"

metadata_filename="Litter-groups-A-through-ii.csv" #note I gave this headings

#load data and metadata for images
image_df <- read.csv(here(path_to_csv_folder,img_filename), check.names=FALSE)
metadata <- read.csv(here(path_to_csv_folder,metadata_filename), check.names = FALSE)
image_df_combined <- merge(image_df, metadata, by.x="Image_Metadata_Litter", by.y="Litter")

#load data for single cells
sc_df <- read.csv(here(path_to_csv_folder,sc_filename), check.names=FALSE)
sc_df_combined <- merge(sc_df, image_df_combined, by="ImageNumber")

We also need to compute the average length of a branch, taken as the total skeleton length / (number of trunks + number of non-trunk branches).

sc_df_combined <- sc_df_combined %>% 
  mutate(BorderedCells_ObjectSkeleton_avg_branch_length = BorderedCells_ObjectSkeleton_TotalObjectSkltnLngth_MrphlgclSkltn/(BorderedCells_ObjectSkeleton_NumberNonTrunkBranchs_MrphlgclSkltn+BorderedCells_ObjectSkeleton_NumberTrunks_MorphologicalSkeleton))

Summarize skeleton features

Summarize for each region, mouse, treatment (region_df) and for each mouse and treatment group (total_df).

# Select only columns with fiber measurements and create summary table:
cols2summarize <- names(sc_df_combined)[grepl("*ObjectSkeleton*", names(sc_df_combined))] 
# make Inf NaN
sc_df_combined[sc_df_combined==Inf]<- NaN
#get morphological skeleton features

region_df <- sc_df_combined %>% 
  group_by(ExpGroup, Image_Metadata_Mouse, Image_Metadata_Region) %>%
  summarize(across(all_of(cols2summarize), mean, na.rm=TRUE))
`summarise()` has grouped output by 'ExpGroup', 'Image_Metadata_Mouse'. You can
override using the `.groups` argument.
total_df <- region_df %>% 
  group_by(ExpGroup, Image_Metadata_Mouse) %>%
  summarize(across(all_of(cols2summarize), mean, na.rm=TRUE))
`summarise()` has grouped output by 'ExpGroup'. You can override using the
`.groups` argument.

Plot skeleton features

Plot the five features by region and by experimental group ExpGroup. First, we set up some plotting colors and descriptions for each variable:

pal=c("#FFE66D",  "#C2CAE8", "#FF4365", "#00D9C0", "#F26419", "#86BBD8")
pal_dark=c("#A38800",  "#273568","#FF6B6B", "#FF4365", "#00D9C0", "#F26419", "#86BBD8")

measurements = c("BorderedCells_ObjectSkeleton_NumberBranchEnds_MorphologicalSkltn", 
                 "BorderedCells_ObjectSkeleton_NumberNonTrunkBranchs_MrphlgclSkltn",
                 "BorderedCells_ObjectSkeleton_NumberTrunks_MorphologicalSkeleton",
                 "BorderedCells_ObjectSkeleton_TotalObjectSkltnLngth_MrphlgclSkltn",
                 "BorderedCells_ObjectSkeleton_avg_branch_length")
desc = c("Number of Branch Termini",
         "Number of Trunk Branches",
         "Number of Trunks",
         "Total Skeleton Length",
         "Mean Process Length")

Here is the function for our box and whisker plot:

plot_boxwhisker <- function(data, measurements, desc, index, x_var){
  meas<- measurements[index]
  data<- data %>% filter(is.finite(get(meas)))
  p <- ggplot(data, aes(x=get(x_var), y=get(meas), fill=ExpGroup))+
  scale_fill_manual(values=pal)+
  geom_boxplot(position = position_dodge(0.9))+
  geom_jitter(position = position_dodge(0.9), alpha=0.8, aes(color=ExpGroup))+
  scale_color_manual(values=pal_dark)+
  stat_summary(fun=mean, geom="point", shape=18,
               position = position_dodge(0.9),
               size=3, color="firebrick")+
  #geom_jitter(position = position_dodge(0.9), alpha=0.4)+
  labs(title=paste0("Average ",desc[index]), 
       subtitle="Each dot represents 1 animal",
       caption="Source: Batch 3",
       x="Region",
       y=desc[index])+
  theme(text = element_text(size = 20))
  return(p)
}

Now we apply that function to each of the 5 variables we want to plot:

p_list=list()
for (i in 1:length(measurements)) {
  p_list[[i]] <- plot_boxwhisker(region_df, measurements, desc, i, x_var="Image_Metadata_Region")
  plot(p_list[[i]])
}

(a)

(b)

(c)

(d)

(e)

Figure 1: No effect of experimental group on fiber measures.

In examining Mean Process Length, it almost looks like there could be a difference associated with experimental group in R4. We can look at this further by conducting a t-test. RA and IH are not statistically different:

#t test
y_var="BorderedCells_ObjectSkeleton_avg_branch_length"
grp1 <- region_df %>% filter(Image_Metadata_Region=="R4", ExpGroup=="11% IH") %>% pull(y_var)
grp2 <- region_df %>% filter(Image_Metadata_Region=="R4", ExpGroup=="RA") %>% pull(y_var)
t.test(grp1, grp2)

    Welch Two Sample t-test

data:  grp1 and grp2
t = -1.8325, df = 22.148, p-value = 0.08035
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -3.361794  0.207057
sample estimates:
mean of x mean of y 
 15.60552  17.18289 

Now let’s just look overall at RA vs. IH mice:

p_list=list()
for (i in 1:length(measurements)) {
  p_list[[i]] <- plot_boxwhisker(total_df, measurements, desc, i, x_var="ExpGroup")
  plot(p_list[[i]])
}

#cowplot::plot_grid(plotlist=p_list, ncol=2, scale=1)

(a)

(b)

(c)

(d)

(e)

Figure 2: No effect of experimental group on fiber measures.

Instead of viewing each animal as a datapoint, we can also look at the distribution of all cells. Here’s our plotting function:

plot_distribution <- function(data, measurements, desc, index, x_var, facetby){
  p<- ggplot(sc_df_combined, aes(x=get(measurements[index]), fill=ExpGroup, color=ExpGroup))+
  geom_density(alpha=0.7, adjust=1, color=NaN)+
  scale_fill_manual(values=pal)+
  geom_density(alpha=0.2, adjust=1, fill=NaN)+
  scale_color_manual(values=pal_dark)+
  facet_wrap(~get(facetby))+
  labs(title=paste0("Distribution of ",desc[index], " by Region, treatment"), 
       caption="Source: Batch 3",
       x=desc[index])+
  theme_bw()
  print(p)
}

Let’s make some distribution plots:

for (i in 1:length(measurements)) {
  plot_distribution(sc_df_combined, measurements, desc, i, x_var="ExpGroup", facetby="Image_Metadata_Region")
}

Warning: Removed 2637 rows containing non-finite values (stat_density).
Removed 2637 rows containing non-finite values (stat_density).

Make summary table (n counts for mouse, region, treatment)

tally_df <- sc_df_combined %>% 
  group_by(Image_Metadata_Mouse, Image_Metadata_Region, ExpGroup, ImageNumber) %>%
  tally(name="n_cells") %>% 
  select(-ImageNumber) %>% 
  group_by(Image_Metadata_Mouse, Image_Metadata_Region, ExpGroup) %>% 
  add_tally(name="n_images") %>%
  summarize(n_cells=sum(n_cells), n_images=median(n_images)) %>%
  mutate(n_cells_per_image=n_cells/n_images)
`summarise()` has grouped output by 'Image_Metadata_Mouse',
'Image_Metadata_Region'. You can override using the `.groups` argument.
datatable(tally_df, 
          style="default",
          rownames=FALSE, 
          colnames=c("Mouse", "Region", "Group", "n cells", "n images", "n cells per image"),
          extensions = 'Buttons',
          fillContainer = FALSE,
          options = list(
            pageLength = 20, 
            dom = 'Bfrtip',
            buttons = c('copy', 'print', 'csv', 'excel', 'pdf')
            )
          )
#total_count_RA <- sc_df_combined %>% filter(ExpGroup=="RA") %>% count() %>% pull()
#total_count_IH <- sc_df_combined %>% filter(ExpGroup=="11% IH") %>% count() %>% pull()